********************************************************************************
*PROJECT: Legislature Integration and Bipartisanship: A Natural Experiment in Iceland								   
*PURPOSE: Prep voting data for dyadic regressions
********************************************************************************

clear all
set more off
pause on
local filefolder = "$path_pch\Data\raw\Voting"
local cosponfolder = "$path_pch\Data\raw\Co-Sponsorship"
local seatingfolder = "$path_pch\Data\intermediate\Seating"
local votingfolder = "$path_pch\Data\raw\Voting"
local votingintfolder = "$path_pch\Data\intermediate\Voting"
local votinganalyfolder = "$path_pch\Data\analysis\Voting"
local tablefolder = "$path_pch\Output\Tables"
local graphfolder = "$path_pch\Output\Graphs"
local mpfolder = "$path_pch\Data\raw\MP"
tempfile billtype voteData voteDataLag voteDataLead MPData MPData_d yob holding using ///
	yob_d VS_Vote_p50 VS_Vote_p25 voting_MP_dyad_p25 voting_MP_dyad_p50 ///
	first_half voting_MP_dyad_first0 voting_MP_dyad_first1 voteCalendar ///
	voting_MP_dyad_busy0 voting_MP_dyad_busy1 voting_MP_dyad_frv ///
	voting_MP_dyad_psk voting_MP_dyad_til voting_MP_dyad_brt voting_MP_dyad_rest
timer clear
timer on 1


*********************************************************************
// Create dyadic voting data, save (comment out after saving)
*********************************************************************
* Objective: create pair-bill level dyads with MP_id, MP_id_d, vote, vote_d, vote_id, session_id, bill_id

// Find vote type
insheet using "`votingfolder'/vote_info_20210626.csv", comma clear
isid vote_id
keep session_id bill_id vote_id billtype
merge m:1 session_id bill_id using "`votingintfolder'/bill_type/bill_cat_clean", keep(1 3) nogen
keep vote_id billtype *_cat
save `billtype'

// Bring voting data created from Saia_prep
use "`votinganalyfolder'/voting_analysis", clear

// Find number of votes per day
preserve
	keep vote_id votedate
	duplicates drop
	isid vote_id
	bys votedate: g NvotesToday = _N
	keep vote_id NvotesToday
	g busyDay = NvotesToday>=50
	la var busyDay "=1 if at least 50 votes today"
	save `voteCalendar'
restore


// Create dyads without MP info
keep vote_id session_id bill_id vote MP_id
preserve
	rename vote vote_d
	rename MP_id MP_id_d
	save `using'
restore
joinby vote_id session_id bill_id using `using'
	// This might be too big to handle.
	// Another way to do this is doing one observation at a time.

tempfile voting_MP_vote_dyad
save `voting_MP_vote_dyad', replace


*********************************************************************
// Prep Voting Data (dyads; including lead and lag)
// (Outcome variables: (i) fraction of identical votes (yes/no/abstain)
// (ii) average differences (yes = 1, abstain = 0, no = -1)
// (Comment out after saving)
*********************************************************************

// Consensus votes vs. nonconsensus votes (comment out after saving voting_MP_dyad.dta)

// Bring voting data created from Saia_prep
use "`votinganalyfolder'/voting_analysis", clear

// Calculate modal vote
bys vote_id vote: gen NumVote = _N
bys vote_id: egen ModalVoteCount = max(NumVote)
bys vote_id: gen totalvote = _N

// collapse, draw histogram
keep vote_id ModalVoteCount totalvote first_half
duplicates drop
gen ModalVoteShare = ModalVoteCount / totalvote
sum ModalVoteShare, d

foreach p in p50 p25 {
	preserve
		keep if ModalVoteShare < r(`p')
		keep vote_id
		dis _N
		save `VS_Vote_`p''
	restore
}

// save first vs. second half indicator
keep vote_id first_half
save `first_half'

// merge on first half indicator and number of votes that day
use `voting_MP_vote_dyad', clear
merge m:1 vote_id using `first_half', assert(3) nogen
merge m:1 vote_id using `voteCalendar', assert(3) nogen
merge m:1 vote_id using `billtype', keep(1 3)
assert _merge==3 if vote_id!=37680 // billtype missing for 37680 (https://www.althingi.is/thingstorf/thingmalin/atkvaedagreidsla/?nnafnak=37680)
drop _merge

// Collapse info to get 3 summary info on vote similarity (fraction samevote 3cat, fraction samevote 4cat, average vote similarity)
gen samevote4c = vote == vote_d
gen samevote3c = (vote == vote_d) | ((vote=="absent")&(vote_d=="abstain")) | ((vote_d=="absent")&(vote=="abstain"))
gen samevotesc = (vote == "yes" & vote_d == "yes") | (vote == "no" & vote_d == "no")
gen votediff4c = abs(3*(vote=="yes")+2*(vote=="absent")+1*(vote=="abstain")-3*(vote_d=="yes")-2*(vote_d=="absent")-1*(vote_d=="abstain"))
gen votediff3c = abs((vote=="yes")-(vote=="no")-(vote_d=="yes")+(vote_d=="no"))

// Alternative outcomes for robustness
gen samevotePresent = (vote == "yes" & vote_d == "yes") | (vote == "no" & vote_d == "no") | (vote == "abstain" & vote_d == "abstain")
gen samePresentAbsent = (vote!="absent" & vote_d!="absent") | (vote=="absent" & vote_d=="absent")
gen votediff4cFlip = abs(3*(vote=="yes")+2*(vote=="abstain")+1*(vote=="absent")-3*(vote_d=="yes")-2*(vote_d=="abstain")-1*(vote_d=="absent"))
gen samevote4cNoAbsent = samevote4c
replace samevote4cNoAbsent=. if vote=="absent" | vote_d=="absent"

// Add vote similarity measure, add label
gen voteSim4c = 3 - votediff4c
gen voteSim3c = 2 - votediff3c
gen voteSim4cFlip = 3 - votediff4cFlip

// Collapse to get measures: contentious votes (VS, using p50 ~= .66), very contentious votes (VVS, using p25 ~= .52)
forv k = 1/2 {
	local p: word `k' of "p25" "p50"
	local disc: word `k' of "VVS" "VS"
	preserve
		merge n:1 vote_id using `VS_Vote_`p'', nogen keep(3) // keep only contentious votes
		drop vote_id vote vote_d // Drop unncessary variables
		collapse (mean) samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent, by(session_id bill_id MP_id MP_id_d) // each bill gets the same weight, so collapse by bill first.
		drop bill_id
		collapse (mean) samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent, by(session_id MP_id MP_id_d) // now collapse to create a session-MP-pair dyads
		keep if MP_id < MP_id_d
		foreach var of varlist samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent {
			ren `var' `var'_`disc'
		}
		save `voting_MP_dyad_`p'' 
	restore 
}

// Collapse to get measures: first-half vs. second-half votes
forv k = 0/1 {
	preserve
		keep if first_half==`k'
		drop vote_id vote vote_d // Drop unnecessary variables
		collapse (mean) samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent, by(session_id bill_id MP_id MP_id_d) // each bill gets the same weight, so collapse by bill first.
		drop bill_id
		collapse (mean) samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent, by(session_id MP_id MP_id_d) // now collapse to create a session-MP-pair dyads
		keep if MP_id < MP_id_d
		foreach var of varlist samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent {
			if `k'==0 {
				ren `var' `var'_2nd
			}
			else {
				ren `var' `var'_1st
			}
		}
		save `voting_MP_dyad_first`k'' 
	restore 
}

// Collapse to get measures: busy-day vs. light-day votes
forv k = 0/1 {
	preserve
		keep if busyDay==`k'
		drop vote_id vote vote_d // Drop unnecessary variables
		collapse (mean) samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent, by(session_id bill_id MP_id MP_id_d) // each bill gets the same weight, so collapse by bill first.
		drop bill_id
		collapse (mean) samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent, by(session_id MP_id MP_id_d) // now collapse to create a session-MP-pair dyads
		keep if MP_id < MP_id_d
		foreach var of varlist samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent {
			if `k'==0 {
				ren `var' `var'_lite
			}
			else {
				ren `var' `var'_busy
			}
		}
		save `voting_MP_dyad_busy`k'' 
	restore 
}

// Collapse to get measures by bill type
local i=1
local typelabs "frv psk til brt rest"
foreach x in Frv. Þskj. Till. Brtt. NA {

	local type: word `i' of `typelabs'
	
	preserve
		if `i'<=4 {
			keep if billtype=="`x'"
		}
		else {
			keep if !inlist(billtype,"Frv.","Þskj.","Till.","Brtt.")
		}
		drop vote_id vote vote_d // Drop unnecessary variables
		collapse (mean) samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent, by(session_id bill_id MP_id MP_id_d) // each bill gets the same weight, so collapse by bill first.
		drop bill_id
		collapse (mean) samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent, by(session_id MP_id MP_id_d) // now collapse to create a session-MP-pair dyads
		keep if MP_id < MP_id_d
		foreach var of varlist samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent {
			ren `var' `var'_`type'
		}
		save `voting_MP_dyad_`type'' 
	restore 
	
	local i=`i'+1
}

// Collapse to get measures by bill category
foreach x in industry foreign econ health law educ community transport admin relig environ {
	preserve
		merge n:1 vote_id using `VS_Vote_p50', nogen keep(3) // keep only contentious votes

		keep if `x'_cat==1

		drop vote_id vote vote_d // Drop unnecessary variables
		collapse (mean) samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent, by(session_id bill_id MP_id MP_id_d) // each bill gets the same weight, so collapse by bill first.
		drop bill_id
		collapse (mean) samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent, by(session_id MP_id MP_id_d) // now collapse to create a session-MP-pair dyads
		keep if MP_id < MP_id_d
		foreach var of varlist samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent {
			ren `var' `var'_`x'
		}
		tempfile voting_MP_dyad_`x'
		save `voting_MP_dyad_`x'' 
	restore 
}

// Collapse to get measures: all votes
drop vote_id vote vote_d
collapse (mean) samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent, by(session_id bill_id MP_id MP_id_d) // each bill gets the same weight, so collapse by bill first.
drop bill_id
collapse (mean) samevote?c* votediff?c voteSim?c* samevotePresent samePresentAbsent, by(session_id MP_id MP_id_d) // now collapse to create a session-MP-pair dyads
keep if MP_id < MP_id_d

// Label variables
la var voteSim4c "4 Cat vote similarity (3 = same vote, 0 = yes-no)"
la var voteSim3c "3 Cat vote similarity (2 = same vote, 0 = yes-no)"
la var samevote4c "Indicating same vote on 4 category voting"
la var samevote3c "Indicating same vote on 3 category voting"
la var samevotesc "Samevote = (yes-yes or no-no)"

// Label alternative variables
la var samevotePresent "Samevote = (yes-yes or no-no or abstain-abstain)"
la var samePresentAbsent "=1 if both present or both absent" 
la var voteSim4cFlip "4 Cat vote similarity, absent closer to No than abstain"
la var samevote4cNoAbsent "samevote4c set to missing if either absent"

// Merge contentious votes only measures
foreach p in "p25" "p50" {
	merge 1:1 session_id MP_id MP_id_d using `voting_MP_dyad_`p'', nogen
}

// Merge 1st vs. 2nd half, busy vs. light votes
forv k=0/1 {
	merge 1:1 session_id MP_id MP_id_d using `voting_MP_dyad_first`k'', nogen
	merge 1:1 session_id MP_id MP_id_d using `voting_MP_dyad_busy`k'', nogen	
}

// Merge votes by bill type and topic
foreach x in frv psk til brt rest industry foreign econ health law educ community transport admin relig environ {
	merge 1:1 session_id MP_id MP_id_d using `voting_MP_dyad_`x'', nogen
}

save "`votingintfolder'/voting_MP_dyad.dta", replace
save `voteData'


*********************************************************************
// Prepare lead/current/lag
*********************************************************************	
forv k = 1/2 {
	local l: word `k' of "1" "-1"
	local t: word `k' of "Lag" "Lead"
	use "`votingintfolder'/voting_MP_dyad", clear	
	save `voteData', replace
	replace session_id = session_id + `l'
	foreach var of varlist samevote?c* votediff?c* voteSim?c* samevotePresent* samePresentAbsent* {
		ren `var' `var'`t'
	}
	save `voteData`t'', replace
}

*********************************************************************
// Prep MP info
*********************************************************************

// get MP info except yob
use "`seatingfolder'\seating_MP_frontback.dta", clear
keep session_id MP_id seat_initRI0 gender party_id age ///
	random strata coalition ?_MP_idRI? numNeighborRI0 party ///
	male constituency constOrder everministerPast ///
	evercommitteechairLast sessExper wages expenses	left_right
save `MPData'
order session_id MP_id seat_initRI0 gender male party_id age ///
	random strata coalition ?_MP_idRI? numNeighborRI0 party ///
	constituency constOrder everministerPast ///
	evercommitteechairLast sessExper wages expenses	left_right
foreach var of varlist seat_initRI0-left_right {
	ren `var' `var'_d
}
ren MP_id MP_id_d
save `MPData_d'


*********************************************************************
// Create MP dyadic form, merge data
*********************************************************************

// Get MP seating data
use "`seatingfolder'\seating_MP.dta", clear

// just keep basic vars
keep session_id MP_id

// create dyads
preserve
	ren MP_id MP_id_d
	save `holding'
restore
joinby session_id using `holding'
keep if MP_id < MP_id_d

// merge MP info
merge n:1 session_id MP_id using `MPData', keep(1 3) nogen
merge n:1 session_id MP_id_d using `MPData_d', keep(1 3) nogen

// merge LHS variables
foreach t in "" "Lag" "Lead" {
	merge 1:1 session_id MP_id MP_id_d using `voteData`t'', keep(1 3) nogen
}

*********************************************************************
// Create RHS variables
*********************************************************************

// generate neighbor treatment variable
forv i=0/2 {
	g neighborRI`i' = (U_MP_idRI`i'==MP_id_d) | (L_MP_idRI`i'==MP_id_d)
	g neighborFBRI`i' = (F_MP_idRI`i'==MP_id_d) | (B_MP_idRI`i'==MP_id_d)
	g neighbor360RI`i' = neighborRI`i'+neighborFBRI`i'	
}

// generate similarity variables and interactions
foreach var of varlist gender coalition party {
	g same`var' = `var'==`var'_d
	
	foreach x in RI FBRI 360RI {
		forv i=0/2 {
			g neighbor`x'`i'Xsame`var' = neighbor`x'`i'*same`var'
			g neighbor`x'`i'Xdiff`var' = neighbor`x'`i'*(1-same`var')
		}
	}
}


// generate other similarity variables
foreach var of varlist constituency everministerPast evercommitteechairLast {
	g same`var' = `var'==`var'_d
}

// generate diff variables
foreach var of varlist sessExper constOrder wages expenses age left_right {
	g `var'Diff = abs(`var' - `var'_d)
	la var `var'Diff "abs(`var' - `var'_d)"
}

// generate experience similarity variables and interactions
g sameexperience = sessExperDiff<=5
g diffexperience = sessExperDiff>5
foreach x in RI FBRI 360RI {
	forv i=0/2 {
		g neighbor`x'`i'Xsameexperience = neighbor`x'`i'*sameexperience
		g neighbor`x'`i'Xdiffexperience = neighbor`x'`i'*(1-sameexperience)
	}
}


// generate ideological distance variables

* above vs. below-median, conditional on sameparty=0
g partyClose = left_rightDiff<2.8947 if !mi(left_rightDiff)
g partyFar = left_rightDiff>=2.8947 if !mi(left_rightDiff)
la var partyClose "=1 if left_rightDiff<2.8947 (median for diffparty pairs)"
la var partyFar "=1 if left_rightDiff>=2.8947 (median for diffparty pairs)"

* terciles, conditional on sameparty=0
g partyVClose = left_rightDiff<=2.3635 if !mi(left_rightDiff)
g partyMid = left_rightDiff>2.3635 & left_rightDiff<=3.3211 if !mi(left_rightDiff)
g partyVFar = left_rightDiff>3.3211 if !mi(left_rightDiff)
la var partyVClose "=1 if left_rightDiff<2.3635 (lowest tercile for diffparty pairs)"
la var partyMid "=1 if left_rightDiff>2.3635 & <=3.3211 (middle tercile for diffparty pairs)"
la var partyVFar "=1 if left_rightDiff>3.3211 (higher tercile for diffparty pairs)"


* interactions
foreach var of varlist partyClose partyFar partyVClose partyMid partyVFar {
	forv i=0/2 {
		g neighborRI`i'X`var' = neighborRI`i'*`var'
	}
}

// generate age-diff and sessExper-diff interactions
foreach x in age sessExper {
	forv i=0/2 {
		g neighborRI`i'X`x'Diff = neighborRI`i'*`x'Diff
	}
}

// other important variables
g maxMP = max(MP_id,MP_id_d)
g minMP = min(MP_id,MP_id_d)
egen pairFE = group(minMP maxMP)
g pairStrata = 0 if strata == 0 & strata_d == 0
replace pairStrata = 1 if strata != strata_d
replace pairStrata = 2 if strata == 1 & strata_d == 1

// speaker dummy variable
g speaker= random==3
g speaker_d= random_d==3
g speaker_in_pair = speaker==1 | speaker_d==1

// make special and short session dummies
g specialSession = inlist(session_id,119,124,129,134,137,142)
g shortSession = session_id==147

// first and last session indicators
g firstSession = inlist(session_id,115,119,120,124,125,129,130,134,135,137,138,142,143,146,148)
la var firstSession "special session or first proper session of term - should probably not use these sessions when lagged outcome"
g lastSession = inlist(session_id,118,123,128,133,136,141,145,147)
la var lastSession "last session of term - should probably not use these sessions when lead outcome"
g leadShortIncomplete = inlist(session_id,146,148)
la var leadShortIncomplete "next session is short or incomplete - should probably not use these sessions when lead outcome"

// drop unnecessary variables
drop U_* L_* F_* B_*

// save file
save "`votinganalyfolder'/dyads_voting", replace


*********************************************************************
// Beep and report computing time
*********************************************************************

beep
sleep 1000
beep
sleep 1000
beep

	
timer off 1
timer list
